# 目标站点 : https://xa.lianjia.com/zufang/
# 翻页站点 : https://xa.lianjia.com/zufang/pg2/#contentList
import requests
import random
import time
from openpyxl import workbook
from bs4 import BeautifulSoup


def get_data(url, headers):
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
            # print(response.text)
        else:
            return response.status_code
            # print(response.status_code)
    except requests.exceptions.RequestException as e:
        print(e)


def get_image(url, headers):
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.content
            # print(response.content)
        else:
            return response.status_code
            # print(response.status_code)
    except requests.exceptions.RequestException as e:
        print(e)


def save_data(url, title, img, price, address):
    ws.append([url, title, img, price, address])
    wb.save('西安链家-租房.xlsx')


def parse_data(data):
    soup = BeautifulSoup(data, 'lxml')
    zufang_url = soup.find_all('a', {'class': 'twoline'})
    title = soup.find_all('a', {'class': 'content__list--item--aside'}) # 标题
    image = soup.find_all('img', {'class': 'lazyload'}) # 图片
    price = soup.find_all('span', {'class': 'content__list--item-price'}) # 价格
    address = soup.find_all('p', {'class': 'content__list--item--des'}) # 地址
    for zufang_urls, titles, images, prices, addresses in zip(zufang_url, title, image, price, address):
        soup = BeautifulSoup(str(zufang_urls), 'lxml')
        link_url = 'https://xa.lianjia.com' + soup.a['href']
        print("租房链接:",link_url)
        soup = BeautifulSoup(str(titles), 'lxml')
        title = soup.a['title']
        print("房屋标题:",title)
        soup = BeautifulSoup(str(images), 'lxml')
        image = soup.img['data-src']
        print("照片链接:",image)
        price = prices.get_text()
        print("房屋价格:",price)
        address = addresses.get_text().strip().replace('\n', '').replace(' ', '')
        print("房屋详情:",address)
        save_data(link_url,title, image, price, address)
        print('-'*100)

if __name__ == '__main__':
    wb = workbook.Workbook()
    ws = wb.active
    for page in range(1, 6):
        url = f'https://xa.lianjia.com/zufang/pg{page}/#contentList'
        USER_AGENT_LIST = ['Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Hot Lingo 2.0)',
                           'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3451.0 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.2999.0 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36 OPR/31.0.1889.174',
                           'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.1.4322; MS-RTC LM 8; InfoPath.2; Tablet PC 2.0)',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 OPR/55.0.2994.61',
                           'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MATP; InfoPath.2; .NET4.0C; CIBA; Maxthon 2.0)',
                           'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.814.0 Safari/535.1',
                           'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; ja-jp) AppleWebKit/418.9.1 (KHTML, like Gecko) Safari/419.3',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36',
                           'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0; Touch; MASMJS)',
                           'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
                           'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Hot Lingo 2.0)',
                           'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3451.0 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.2999.0 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.70 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36 OPR/31.0.1889.174',
                           'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.1.4322; MS-RTC LM 8; InfoPath.2; Tablet PC 2.0)',
                           'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 OPR/55.0.2994.61',
                           'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MATP; InfoPath.2; .NET4.0C; CIBA; Maxthon 2.0)',
                           'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.814.0 Safari/535.1',
                           'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; ja-jp) AppleWebKit/418.9.1 (KHTML, like Gecko) Safari/419.3',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36',
                           'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0; Touch; MASMJS)',
                           'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4093.3 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko; compatible; Swurl) Chrome/77.0.3865.120 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
                           'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Goanna/4.7 Firefox/68.0 PaleMoon/28.16.0',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4086.0 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:75.0) Gecko/20100101 Firefox/75.0',
                           'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/91.0.146 Chrome/85.0.4183.146 Safari/537.36',
                           'Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 VivoBrowser/8.4.72.0 Chrome/62.0.3202.84',
                           'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.60',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:83.0) Gecko/20100101 Firefox/83.0',
                           'Mozilla/5.0 (X11; CrOS x86_64 13505.63.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:68.0) Gecko/20100101 Firefox/68.0',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36',
                           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
                           'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36 OPR/72.0.3815.400',
                           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Safari/537.36',
        ]
        headers = {
            'cookie': 'select_city=610100; lianjia_uuid=873c25e5-c4f0-4fe8-a95a-f34e838db8a5; beikeBaseData=%7B%22parentSceneId%22%3A%22%22%7D; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221961b01a55c65d-0292b93c452aa98-26011c51-1049088-1961b01a55db43%22%2C%22%24device_id%22%3A%221961b01a55c65d-0292b93c452aa98-26011c51-1049088-1961b01a55db43%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.google.com%2F%22%2C%22%24latest_referrer_host%22%3A%22www.google.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%7D; _ga=GA1.2.708869608.1744209842; _gid=GA1.2.1950498432.1744209842; lianjia_ssid=284ba234-ed42-4ea0-b6e2-c517d910fe6f; Hm_lvt_46bf127ac9b856df503ec2dbf942b67e=1744209873; HMACCOUNT=FD8441B99FAF7E21; _jzqa=1.3329090453795071000.1744209875.1744209875.1744209875.1; _jzqc=1; _jzqx=1.1744209875.1744209875.1.jzqsr=google%2Ecom|jzqct=/.-; _jzqckmp=1; _qzjc=1; _qzja=1.1272526844.1744209875081.1744209875081.1744209875081.1744209875081.1744209902653.0.0.0.2.1; _qzjb=1.1744209875081.2.0.0.0; _qzjto=2.1.0; _jzqb=1.2.10.1744209875.1; _ga_WGKDF6B591=GS1.2.1744209883.1.1.1744209913.0.0.0; Hm_lpvt_46bf127ac9b856df503ec2dbf942b67e=1744209949; _ga_QP8TFQJ8C6=GS1.2.1744209940.1.1.1744209949.0.0.0; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiMTBjYzk4YjU4YjZkOWU4MjllOGQ3YTkyYmJjMjM3YTc0NzY2NjI1OGQ4NTAyNjI1NGY1YmUwNzE0YTY1YTE4ZjQxMGM4OWRjZGJiY2E4NzFmNWNmYzE3YTMzNTQ2MGI5OWE0ZTgzMTRjMTk4MmVlOThkMzUwYzQ4NGRkNDU0ZTYzMGY5MzQ5MGU4NmE0YTExY2FjNmQ1ZTliNTIxODY2ZjNjNDQ1YTliZGMzNDg1ZGUxMDFkYmRkNGM5NmEzNWZiZmY5MzQ4YTRiNmQyY2NhYWIzY2E5MGZjMmU1NjIxNjFlMGFhYjE4ZjhhN2FhMjVkOTdjNDM2MWIyYTA2ZDM4ZFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCI4YjBlM2RhZlwifSIsInIiOiJodHRwczovL3hhLmxpYW5qaWEuY29tL3p1ZmFuZy8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ==; GUARANTEE_POPUP_SHOW=true; GUARANTEE_BANNER_SHOW=true',
            'user-agent': random.choice(USER_AGENT_LIST),
            'content-type':'text/html; charset=UTF-8'
        }
        t = random.randint(8, 15)
        data = get_data(url, headers)
        parse_data(data)
        print(f"休眠{t}秒")
        time.sleep(t)
